package com.common.spider.spiderintroduction;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import java.util.ArrayList;
import java.util.List;

public class JsoupDemo {
    public static void main(String[] args) {

        String html = "<html><div id=\"blog_list\"><div class=\"blog_title1\"><a href=\"url1" +
                "\">第一篇博客</a></div><div class=\"blog_title\"><a href=\"url2\">" +
                "第二篇博客</a></div><div class=\"blog_title\"><a href=\"url3\">第三篇博客" +
                "</a></div></div></html>";
        Document document = Jsoup.parse(html);
        Elements elements = document.select("div[id=blog_list]").select("div[class=blog_title]");
        //System.out.println(elements.toString());
        List<String> contents = new ArrayList<>();
        List<String> urls = new ArrayList<>();
        for (Element e : elements) {
            String text = e.text();
            contents.add(text);
            System.out.println(text);
            //attr 传入key 或者 value可以得到键值对形式的数据
            String a = e.select("a").attr("href");
            urls.add(a);
        }
        for (String title : contents) {
            System.out.println(title);
        }
        for (String url : urls) {
            System.out.println(url);
        }
        System.out.println("============================================");
        System.out.println(html);
    }
}
